{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5c589c8b-46f3-4df7-bd3b-6baae2a63bdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains import ConversationalRetrievalChain\n",
    "from langchain.memory import ConversationBufferMemory\n",
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_openai.embeddings import OpenAIEmbeddings\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "35a2fb84-2d95-4db8-b896-77422e8f938b",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader(\"./demo2.txt\")\n",
    "docs = loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1806cd71-6ed2-43a2-a3fa-a2d3be393243",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size=500,\n",
    "    chunk_overlap=40,\n",
    "    separators=[\"\\n\", \"。\", \"！\", \"？\", \"，\", \"、\", \"\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "bc3db8f3-bb5b-4484-9209-632dccfb9f68",
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = text_splitter.split_documents(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "01597bf7-0851-4d22-830a-8427687cd12c",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings_model = OpenAIEmbeddings()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3d1e1cf4-7623-4d09-87ce-77bae9057aca",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "db = FAISS.from_documents(texts, embeddings_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7867aa87-9e97-44ae-bb9a-500d4cd6e080",
   "metadata": {},
   "outputs": [],
   "source": [
    "retriever = db.as_retriever()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9e284b73-afe2-49b2-94ce-d2e8b0850051",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = ChatOpenAI(model=\"gpt-3.5-turbo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d92e719b-9ec2-41fd-a337-c315f3e5cbb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'chat_history': [HumanMessage(content='卢浮宫这个名字怎么来的？'),\n",
       "  AIMessage(content='根据法国百科全书辞典大拉鲁斯百科全书的说法，卢浮宫这个名字来源于与狼狩猎巢穴的联系（拉丁语：lupus，下帝国： lupara）。')],\n",
       " 'question': '卢浮宫这个名字怎么来的？',\n",
       " 'answer': '根据法国百科全书辞典大拉鲁斯百科全书的说法，卢浮宫这个名字来源于与狼狩猎巢穴的联系（拉丁语：lupus，下帝国： lupara）。'}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "memory = ConversationBufferMemory(return_messages=True,\n",
    "                                  memory_key='chat_history',\n",
    "                                  output_key='answer')\n",
    "qa = ConversationalRetrievalChain.from_llm(\n",
    "    llm=model,\n",
    "    retriever=retriever,\n",
    "    memory=memory,\n",
    "    chain_type=\"map_reduce\"\n",
    ")\n",
    "qa.invoke({\"chat_history\": memory, \"question\": \"卢浮宫这个名字怎么来的？\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "17f1ab10-3f9d-4bfb-bed0-44ad334d0560",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'chat_history': [HumanMessage(content='卢浮宫这个名字怎么来的？'),\n",
       "  AIMessage(content='根据提供的新背景信息，卢浮宫这个名字来源于与狼狩猎巢穴的联系（拉丁语：lupus，下帝国： lupara）。卢浮宫是一座位于法国巴黎市中心的塞纳河边的博物馆，原是建于12世纪末至13世纪初的王宫，现在是一所世界上最大的艺术博物馆之一，也是参观人数最多的博物馆之一。1793年8月10日，正值君主制灭亡一周年，卢浮宫正式命名为“中央艺术博物馆”开放。在大革命期间，卢浮宫展示了来自皇室、教堂、贵族和地方政府等处没收来的艺术品。后来，为了扩大收藏，共和国每年投入约100,000里弗维持博物馆的经营，并开始从欧洲各地和梵蒂冈等地带来作品。在拿破仑时代，卢浮宫继续展示各种珍贵的艺术品，成为了法国艺术和文化的重要中心。')],\n",
       " 'question': '卢浮宫这个名字怎么来的？',\n",
       " 'answer': '根据提供的新背景信息，卢浮宫这个名字来源于与狼狩猎巢穴的联系（拉丁语：lupus，下帝国： lupara）。卢浮宫是一座位于法国巴黎市中心的塞纳河边的博物馆，原是建于12世纪末至13世纪初的王宫，现在是一所世界上最大的艺术博物馆之一，也是参观人数最多的博物馆之一。1793年8月10日，正值君主制灭亡一周年，卢浮宫正式命名为“中央艺术博物馆”开放。在大革命期间，卢浮宫展示了来自皇室、教堂、贵族和地方政府等处没收来的艺术品。后来，为了扩大收藏，共和国每年投入约100,000里弗维持博物馆的经营，并开始从欧洲各地和梵蒂冈等地带来作品。在拿破仑时代，卢浮宫继续展示各种珍贵的艺术品，成为了法国艺术和文化的重要中心。'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "memory = ConversationBufferMemory(return_messages=True,\n",
    "                                  memory_key='chat_history',\n",
    "                                  output_key='answer')\n",
    "qa = ConversationalRetrievalChain.from_llm(\n",
    "    llm=model,\n",
    "    retriever=retriever,\n",
    "    memory=memory,\n",
    "    chain_type=\"refine\"\n",
    ")\n",
    "qa.invoke({\"chat_history\": memory, \"question\": \"卢浮宫这个名字怎么来的？\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c6106cf5-7ecb-40cc-be03-eae03de1aa22",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.11/site-packages/langchain/chains/llm.py:344: UserWarning: The apply_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'chat_history': [HumanMessage(content='卢浮宫这个名字怎么来的？'),\n",
       "  AIMessage(content='这个名字来源于与狼狩猎巢穴的联系')],\n",
       " 'question': '卢浮宫这个名字怎么来的？',\n",
       " 'answer': '这个名字来源于与狼狩猎巢穴的联系'}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "memory = ConversationBufferMemory(return_messages=True,\n",
    "                                  memory_key='chat_history',\n",
    "                                  output_key='answer')\n",
    "qa = ConversationalRetrievalChain.from_llm(\n",
    "    llm=model,\n",
    "    retriever=retriever,\n",
    "    memory=memory,\n",
    "    chain_type=\"map_rerank\"\n",
    ")\n",
    "qa.invoke({\"chat_history\": memory, \"question\": \"卢浮宫这个名字怎么来的？\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e939b5d4-c15d-47d7-85ad-4d3d7ea3795e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
